/*********************************************
This program takes the raw files from each system and
systematizes residence measures

INPUTS: Raw enrollment/demographic files
OUTPUTS: residency
Interleaved by PIK OPEID
**********************************************/


/*********** COLORADO ******************/
data cdhe_codes (keep=opeid InstitutionCode funding_type public );
    infile "&codepath./cdhe_institutioncodes.csv" missover dsd delimiter=',' firstobs=2;
    format opeid_short $6.;
    format InstitutionCode $4.;
    format school_name $80.;
    format school_type $20.;
    format funding_type $20.;
    input opeid_short $ InstitutionCode $ school_name $ school_type $ funding_type $; 
    if opeid_short ne '' ;
    if substr(funding_type,1,6) = 'Public' then public=1 ; 
    else public =0 ; 
    opeid = opeid_short||'00';  
run;
        
proc sort ; by institutioncode; run;
    
data co_enr ;
    set INPUTS.coedu_enrollment_1997_2007_qpik
        INPUTS.coedu_enrollment_2008_2016_qpik;
run;

proc sort data=co_enr;
    by institutioncode ; 
run;
        
        
data co_enr (keep=pik opeid reportyear reportterm) ;
    merge co_enr cdhe_codes;
run;
        
proc sort data=co_enr;
        by pik; 
run;
    
    
data co_res ;
    set INPUTS.coedu_demo_1_1997_2016_qpik
        INPUTS.coedu_demo_2_1997_2016_qpik; 
 
    resident = (tuitionclassification = 1) ;
run;

proc sort data=co_res ; 
    by pik ;
run;
        
data co_res ;
        merge co_enr (in=a) co_res (in=b) ;
        by pik;
        
        if b; 
run;
        
proc sort data=co_res ;
        by pik opeid reportyear reportterm;
run;
        
data co_res (keep=pik opeid resident);
        set co_res;
        by pik opeid reportyear reportterm;
   
        if first.opeid then output ;
run;

/********* CUNY - NEW YORK ***************/
data cuny_res (rename=(qpik_ssn = pik));
    merge INPUTS.pseo_ny_cuny_2001_2020_enr_qpik 
        INPUTS.pseo_ny_cuny_2001_2020_enr_rsch ;
    by dqb_source_id ;
    
    resident = (instate = 1) ;
    
run;
    

proc sort data=cuny_res; 
    by pik opeid yearenr semenr ;
run;
        
data cuny_res (keep=pik opeid resident);
    set cuny_res ; 
    by pik opeid yearenr semenr;
    
    if first.opeid then output; 
run;
        
/***************** Ohio Department of Higher Education - ODHE **************/
        
proc sort data=INPUTS.pseo_oh_odhe_2002_2019a_enr_rsch out=odhe_rsch ;
     by dqb_source_id ;
run;
        
proc sort data=INPUTS.pseo_oh_odhe_2002_2019a_enr_qpik out=odhe_pik ;
     by dqb_source_id ;
run;
        
data odhe_res_test (rename=(qpik_ssn = pik)) ;
     merge odhe_rsch (rename=(opeid=opeid_num)) odhe_pik ;
     by dqb_source_id; 
     opeid = put(opeid_num,8.) ;
     resident = (residency_status = "State_resident") ;
        
run;

proc sort data=odhe_res_test ;
     by pik opeid calendar_year term ;
run;
        
data odhe_res (keep=pik opeid resident);
    set odhe_res_test ;
    by pik opeid calendar_year term ;
   
    if first.opeid then output;
run;

     
/**************** State Universities of New York (SUNY) ****************/
        
data suny_res_test (rename=(qpik_ssn = pik));
     merge INPUTS.pseo_ny_suny_2002_2020_enr_qpik 
           INPUTS.pseo_ny_suny_2002_2020_enr_rsch ;
     by dqb_source_id ;
     
     resident = (residency_status = 1) ;
        
run;
        
proc sort data=suny_res_test ;
     by pik opeid year term ;
run;
        
data suny_res (keep=pik opeid resident);
     set suny_res_test;
     by pik opeid year term ;
        
     if first.opeid then output ;
run;

/**************** Texas Higher Education Coordinating Board ***********************/
        
data thecb_res_test (rename=(qpik_ssn= pik)) ;
     merge INPUTS.pseo_tx_thecb_2000_2019_enr_qpik 
        INPUTS.pseo_tx_thecb_2000_2019_enr_rsch ;
     by dqb_source_id ; 
        
     resident = (enrres in ('1','3')) ;
run;
        
proc freq data=thecb_res_test ;
        tables enrres ; 
run;
        
proc sort data=thecb_res_test ; 
     by pik opeid enryear enrsem ;
run;
       
data thecb_res (keep=pik opeid resident);
     set thecb_res_test;
     by pik opeid enryear enrsem ;
     
     if first.opeid then output ;
run;
    
/**************** Utah State Higher Education ***********************/
data ushe_res_test  ;
     merge INPUTS.pseo_ut_ushe_2011_2020_enr_rsch 
        INPUTS.pseo_ut_ushe_2011_2020_enr_pik ;
     by dqb_source_id ; 
        
     resident = (residency='R') ;
run;
    
proc sort data=ushe_res_test ;
    by pik opeid year term;
run;
    
data ushe_res (keep=pik opeid resident) ;
    set ushe_res_test;
    by pik opeid year term;
    
    if first.opeid then output;
run;
    
/*************** Minnesota Office of Higher Education ******************/
data mnohe_res_test  ;
     merge INPUTS.pseo_mn_ohe_2003_2021_enr_rsch 
        INPUTS.pseo_mn_ohe_2003_2021_enr_pik  ;
     by dqb_source_id ; 
        
     resident = (residency='in-state student') ;
run;
    
proc sort data=mnohe_res_test ;
    by pik opeid year term;
run;
    
data mnohe_res (keep=pik opeid resident) ;
    set mnohe_res_test;
    by pik opeid year term;
    
    if first.opeid then output;
run;

/************************************
University System of Georgia
************************************/
data ga_res_test ;
     merge INPUTS.pseo_ga_usg_2001_2022_enr_rsch
     	   INPUTS.pseo_ga_usg_2001_2022_enr_pik ;
     by dqb_source_id ;

     resident = (student_residency_code = 'R') ;
run;

proc sort data=ga_res_test ;
     by pik opeid year term ;
run;

data ga_res (keep=pik opeid resident) ;
     set ga_res_test;
     by pik opeid year term ;
     if first.opeid then output ;
run;

/************************************
Virginia (SCHEV)
************************************/
data va_res_test ;
     merge INPUTS.pseo_va_schev_2000_2022_enr_rsch
     	   INPUTS.pseo_va_schev_2000_2022_enr_pik ;
     by dqb_source_id ;

     resident = (residency='I') ;
run;

proc sort data=va_res_test ;
     by pik opeid year term ;
run;

data va_res (keep=pik opeid resident) ;
     set va_res_test;
     by pik opeid year term ;
     if first.opeid then output ;
run;


/***********************************
Missouri
************************************/
data mo_res_test  ;
     merge INPUTS.pseo_mo_mdhe_2005_2020a_enr_rsch 
        INPUTS.pseo_mo_mdhe_2005_2020a_enr_pik  ;
     by dqb_source_id ; 
        
     resident = (residency='in-state') ;
run;

proc sort data=mo_res_test ;
     by pik opeid year term ;
run;

data mo_res (keep=pik opeid resident) ;
    set mo_res_test;
    by pik opeid year term;
    
    if first.opeid then output;
run;

proc contents data=co_res;
run;

proc contents data=thecb_res;
run;

proc contents data=suny_res;
run;

proc contents data=cuny_res;
run;

proc contents data=odhe_res;
run;


/* Interleaving the data for ease of merging 
in the next step */
data OUTPUTS.residency ;
        set co_res thecb_res suny_res cuny_res
	    odhe_res ushe_res mo_res ga_res va_res; 
        by pik opeid; 
run;

        proc freq data=OUTPUTS.residency ;
            tables resident ;	 
        run;


proc export data=OUTPUTS.residency 
    outfile="&outstata./residency_pikopeid.dta" replace;
run;
    
    

